Used axis rotation to create principal components, which are linear combinations of the original variables.
penguin_pca <- penguins %>%
select(body_mass_g, ends_with("_mm")) %>%
drop_na() %>%
scale() %>%
prcomp()
penguin_pca$rotation
## PC1 PC2 PC3 PC4
## body_mass_g 0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm 0.4552503 0.597031143 0.6443012 -0.1455231
## bill_depth_mm -0.4003347 0.797766572 -0.4184272 0.1679860
## flipper_length_mm 0.5760133 0.002282201 -0.2320840 0.7837987
#Have to make a dataset where the observations that exist match the obs that were used to create the PCA biplot, but still contain the other variables. Those are good to have because we can use other variables to update the aesthetics of the autoplot.
penguin_complete <- penguins %>%
drop_na(body_mass_g, ends_with("_mm"))
#autoplot recognizes the type of data I am giving this, and assumes the type of output I want to create
autoplot(penguin_pca,
data = penguin_complete,
colour = 'species',
loadings=TRUE,
loadings.label=TRUE)+
theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
We have 88% of the data captured in this PCA biplot.
fish_noaa <- read_excel(here("data","foss_landings.xlsx")) %>%
clean_names() %>%
mutate(across(where(is.character), tolower)) %>%
mutate(nmfs_name = str_sub(nmfs_name, end = -4)) %>%
filter(confidentiality == "public")
Make a customized graph:
fish_plot <- ggplot(data = fish_noaa, aes(x=year, y=pounds))+
geom_line(aes(color=nmfs_name), show.legend = FALSE)+
theme_minimal()
fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).
ggplotly(fish_plot)